sessionInfo() #provides list of loaded packages and version of R.
## R version 4.3.2 (2023-10-31)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.0
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: America/New_York
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## loaded via a namespace (and not attached):
## [1] digest_0.6.35 R6_2.5.1 fastmap_1.1.1 xfun_0.43
## [5] cachem_1.0.8 knitr_1.45 htmltools_0.5.8.1 rmarkdown_2.26
## [9] lifecycle_1.0.4 cli_3.6.2 sass_0.4.9 jquerylib_0.1.4
## [13] compiler_4.3.2 rstudioapi_0.16.0 tools_4.3.2 evaluate_0.23
## [17] bslib_0.7.0 yaml_2.3.8 rlang_1.1.3 jsonlite_1.8.8
#BiocManager::install("simplifyEnrichment")
First, load the necessary packages.
library(goseq)
## Loading required package: BiasedUrn
## Loading required package: geneLenDataBase
##
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats)
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(tidyr)
library(grDevices)
library(reshape2)
##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(Rmisc)
## Loading required package: lattice
## Loading required package: plyr
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(ggpubr)
##
## Attaching package: 'ggpubr'
## The following object is masked from 'package:plyr':
##
## mutate
library(tibble)
library(gridExtra)
library(tidyr)
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(ComplexHeatmap)
## Loading required package: grid
## ========================================
## ComplexHeatmap version 2.16.0
## Bioconductor page: http://bioconductor.org/packages/ComplexHeatmap/
## Github page: https://github.com/jokergoo/ComplexHeatmap
## Documentation: http://jokergoo.github.io/ComplexHeatmap-reference
##
## If you use it in published research, please cite either one:
## - Gu, Z. Complex Heatmap Visualization. iMeta 2022.
## - Gu, Z. Complex heatmaps reveal patterns and correlations in multidimensional
## genomic data. Bioinformatics 2016.
##
##
## The new InteractiveComplexHeatmap package can directly export static
## complex heatmaps into an interactive Shiny app with zero effort. Have a try!
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(ComplexHeatmap))
## ========================================
library(circlize)
## ========================================
## circlize version 0.4.16
## CRAN page: https://cran.r-project.org/package=circlize
## Github page: https://github.com/jokergoo/circlize
## Documentation: https://jokergoo.github.io/circlize_book/book/
##
## If you use it in published research, please cite:
## Gu, Z. circlize implements and enhances circular visualization
## in R. Bioinformatics 2014.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(circlize))
## ========================================
library(GSEABase)
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following object is masked from 'package:gridExtra':
##
## combine
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: annotate
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: IRanges
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:plyr':
##
## rename
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following object is masked from 'package:geneLenDataBase':
##
## unfactor
## The following object is masked from 'package:utils':
##
## findMatches
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
##
## Attaching package: 'IRanges'
## The following object is masked from 'package:plyr':
##
## desc
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
##
## Attaching package: 'AnnotationDbi'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: XML
## Loading required package: graph
##
## Attaching package: 'graph'
## The following object is masked from 'package:XML':
##
## addNode
## The following object is masked from 'package:circlize':
##
## degree
## The following object is masked from 'package:plyr':
##
## join
library(data.table)
##
## Attaching package: 'data.table'
## The following object is masked from 'package:IRanges':
##
## shift
## The following objects are masked from 'package:S4Vectors':
##
## first, second
## The following objects are masked from 'package:zoo':
##
## yearmon, yearqtr
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(stringr)
##
## Attaching package: 'stringr'
## The following object is masked from 'package:graph':
##
## boundary
library(GenomicRanges)
## Loading required package: GenomeInfoDb
library(rtracklayer)
library(rrvgo)
library(simplifyEnrichment)
##
## ========================================
## simplifyEnrichment version 1.10.0
## Bioconductor page: https://bioconductor.org/packages/simplifyEnrichment/
## Github page: https://github.com/jokergoo/simplifyEnrichment
## Documentation: https://jokergoo.github.io/simplifyEnrichment/
## Examples: https://simplifyenrichment.github.io/
##
## If you use it in published research, please cite:
## Gu, Z. simplifyEnrichment: an R/Bioconductor package for Clustering and
## Visualizing Functional Enrichment Results, Genomics, Proteomics &
## Bioinformatics 2022.
##
## This message can be suppressed by:
## suppressPackageStartupMessages(library(simplifyEnrichment))
## ========================================
library(rtracklayer)
gff<-rtracklayer::import("../../data/Pocillopora_acuta_HIv2.genes_fixed.gff3")
gff<-as.data.frame(gff) %>% dplyr::select(-Parent)
dim(gff) # 478988 9
## [1] 478988 12
names(gff)
## [1] "seqnames" "start" "end" "width"
## [5] "strand" "source" "type" "score"
## [9] "phase" "ID" "transcript_id" "gene_id"
transcripts <- subset(gff, type == "transcript")
transcripts_gr <- makeGRangesFromDataFrame(transcripts, keep.extra.columns=TRUE) #extract length information
transcript_lengths <- width(transcripts_gr) #isolate length of each gene
seqnames<-transcripts_gr$ID #extract list of gene id
lengths<-cbind(seqnames, transcript_lengths)
lengths<-as.data.frame(lengths) #convert to data frame
dim(transcripts) #33730 13
## [1] 33730 12
kegg <- read.delim("../../data/Pocillopora_acuta_HIv2.genes.KEGG_results.txt",header = FALSE)
kegg <- as.data.frame(kegg)
colnames(kegg)[1] <- "gene_id"
colnames(kegg)[2] <- "KEGG_new"
head(kegg)
## gene_id KEGG_new
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a
## 2 Pocillopora_acuta_HIv2___RNAseq.g24143.t1b K22584
## 3 Pocillopora_acuta_HIv2___RNAseq.g22918.t1
## 4 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 K03386
## 5 Pocillopora_acuta_HIv2___RNAseq.g7985.t1
## 6 Pocillopora_acuta_HIv2___RNAseq.g13511.t1
eggnog<-read.delim("../../data/Pocillopora_acuta_HIv2.genes.EggNog_results.txt")#this file contains all of the go terms, descriptions, kegg, etc
eggnog<- plyr::rename(eggnog, c("X.query"="gene_id"))
head(eggnog,2)
## gene_id seed_ortholog evalue score
## 1 Pocillopora_acuta_HIv2___RNAseq.g24143.t1a 45351.EDO48725 2.16e-120 364
## 2 Pocillopora_acuta_HIv2___RNAseq.g18333.t1 45351.EDO38694 3.18e-123 355
## eggNOG_OGs
## 1 COG0620@1|root,KOG2263@2759|Eukaryota,38GZS@33154|Opisthokonta,3BNKS@33208|Metazoa
## 2 COG0450@1|root,KOG0852@2759|Eukaryota,38B9P@33154|Opisthokonta,3BGS4@33208|Metazoa
## max_annot_lvl COG_category
## 1 33208|Metazoa E
## 2 33208|Metazoa O
## Description Preferred_name
## 1 Cobalamin-independent synthase, Catalytic domain -
## 2 negative regulation of male germ cell proliferation PRDX4
## GOs
## 1 -
## 2 GO:0000003,GO:0001775,GO:0002252,GO:0002263,GO:0002274,GO:0002275,GO:0002283,GO:0002366,GO:0002376,GO:0002443,GO:0002444,GO:0002446,GO:0003006,GO:0003674,GO:0003824,GO:0004601,GO:0005488,GO:0005515,GO:0005575,GO:0005576,GO:0005615,GO:0005622,GO:0005623,GO:0005737,GO:0005783,GO:0005790,GO:0005829,GO:0006082,GO:0006457,GO:0006464,GO:0006468,GO:0006520,GO:0006575,GO:0006793,GO:0006796,GO:0006807,GO:0006810,GO:0006887,GO:0006915,GO:0006950,GO:0006952,GO:0006955,GO:0006979,GO:0007154,GO:0007165,GO:0007249,GO:0007252,GO:0007275,GO:0007276,GO:0007283,GO:0007548,GO:0008150,GO:0008152,GO:0008219,GO:0008285,GO:0008379,GO:0008406,GO:0008584,GO:0009056,GO:0009266,GO:0009409,GO:0009605,GO:0009607,GO:0009617,GO:0009628,GO:0009636,GO:0009893,GO:0009966,GO:0009967,GO:0009987,GO:0010467,GO:0010604,GO:0010646,GO:0010647,GO:0010941,GO:0010942,GO:0010950,GO:0010952,GO:0012501,GO:0012505,GO:0016043,GO:0016192,GO:0016209,GO:0016310,GO:0016491,GO:0016684,GO:0016999,GO:0017001,GO:0017144,GO:0019222,GO:0019471,GO:0019538,GO:0019725,GO:0019752,GO:0019953,GO:0022414,GO:0022417,GO:0023051,GO:0023052,GO:0023056,GO:0030141,GO:0030162,GO:0030198,GO:0031323,GO:0031325,GO:0031410,GO:0031974,GO:0031982,GO:0031983,GO:0032268,GO:0032270,GO:0032501,GO:0032502,GO:0032504,GO:0032940,GO:0033554,GO:0034774,GO:0035556,GO:0036211,GO:0036230,GO:0042119,GO:0042127,GO:0042221,GO:0042592,GO:0042737,GO:0042742,GO:0042743,GO:0042744,GO:0042802,GO:0042803,GO:0042981,GO:0043062,GO:0043065,GO:0043067,GO:0043068,GO:0043085,GO:0043170,GO:0043207,GO:0043226,GO:0043227,GO:0043229,GO:0043231,GO:0043233,GO:0043280,GO:0043281,GO:0043299,GO:0043312,GO:0043412,GO:0043436,GO:0043900,GO:0043901,GO:0044093,GO:0044237,GO:0044238,GO:0044248,GO:0044260,GO:0044267,GO:0044281,GO:0044421,GO:0044422,GO:0044424,GO:0044433,GO:0044444,GO:0044446,GO:0044464,GO:0044703,GO:0045055,GO:0045137,GO:0045321,GO:0045454,GO:0045862,GO:0046425,GO:0046427,GO:0046546,GO:0046661,GO:0046903,GO:0046983,GO:0048232,GO:0048513,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048608,GO:0048609,GO:0048731,GO:0048856,GO:0050789,GO:0050790,GO:0050794,GO:0050896,GO:0051171,GO:0051173,GO:0051179,GO:0051186,GO:0051187,GO:0051234,GO:0051239,GO:0051241,GO:0051246,GO:0051247,GO:0051336,GO:0051345,GO:0051604,GO:0051704,GO:0051707,GO:0051716,GO:0051920,GO:0052547,GO:0052548,GO:0055114,GO:0060205,GO:0060255,GO:0061458,GO:0065007,GO:0065008,GO:0065009,GO:0070013,GO:0070417,GO:0070887,GO:0071704,GO:0071840,GO:0072593,GO:0080090,GO:0097190,GO:0097237,GO:0097708,GO:0098542,GO:0098754,GO:0098869,GO:0099503,GO:0101002,GO:1901564,GO:1901605,GO:1902531,GO:1902533,GO:1904813,GO:1904892,GO:1904894,GO:1905936,GO:1905937,GO:1990748,GO:2000116,GO:2000241,GO:2000242,GO:2000254,GO:2000255,GO:2001056,GO:2001233,GO:2001235,GO:2001267,GO:2001269
## EC KEGG_ko
## 1 2.1.1.14 ko:K00549
## 2 1.11.1.15 ko:K03386
## KEGG_Pathway
## 1 ko00270,ko00450,ko01100,ko01110,ko01230,map00270,map00450,map01100,map01110,map01230
## 2 ko04214,map04214
## KEGG_Module KEGG_Reaction KEGG_rclass
## 1 M00017 R04405,R09365 RC00035,RC00113,RC01241
## 2 - - -
## BRITE KEGG_TC CAZy BiGG_Reaction
## 1 ko00000,ko00001,ko00002,ko01000 - - -
## 2 ko00000,ko00001,ko01000,ko04147 - - -
## PFAMs
## 1 Meth_synt_2
## 2 1-cysPrx_C,AhpC-TSA
gogene <- merge(transcripts, eggnog, by=c("gene_id"), all=T)
gogene <- merge(gogene, kegg, by=c("gene_id"), all=T)
head(gogene,2)
## gene_id seqnames
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t Pocillopora_acuta_HIv2___Sc0000013
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t Pocillopora_acuta_HIv2___Sc0000013
## start end width strand source type score.x phase
## 1 4542087 4551503 9417 + AUGUSTUS transcript NA NA
## 2 4639103 4647350 8248 + AUGUSTUS transcript NA NA
## ID
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t
## transcript_id seed_ortholog evalue score.y
## 1 Pocillopora_acuta_HIv2___RNAseq.10002_t 45351.EDO27354 2.41e-93 317
## 2 Pocillopora_acuta_HIv2___RNAseq.10010_t 6087.XP_002166004.2 1.28e-38 164
## eggNOG_OGs
## 1 COG0666@1|root,KOG0510@2759|Eukaryota,38G7Q@33154|Opisthokonta,3BCDU@33208|Metazoa
## 2 COG0666@1|root,KOG4177@2759|Eukaryota
## max_annot_lvl COG_category Description
## 1 33208|Metazoa DZ osmolarity-sensing cation channel activity
## 2 2759|Eukaryota I spectrin binding
## Preferred_name
## 1 TRPA1
## 2 -
## GOs
## 1 GO:0000302,GO:0001580,GO:0002791,GO:0002793,GO:0003008,GO:0003012,GO:0003674,GO:0004888,GO:0005034,GO:0005215,GO:0005216,GO:0005217,GO:0005244,GO:0005245,GO:0005261,GO:0005262,GO:0005488,GO:0005515,GO:0005575,GO:0005623,GO:0005886,GO:0005887,GO:0006810,GO:0006811,GO:0006812,GO:0006816,GO:0006873,GO:0006874,GO:0006875,GO:0006936,GO:0006939,GO:0006950,GO:0006979,GO:0007154,GO:0007165,GO:0007166,GO:0007204,GO:0007600,GO:0007602,GO:0007606,GO:0007610,GO:0007638,GO:0008150,GO:0008324,GO:0009266,GO:0009314,GO:0009408,GO:0009409,GO:0009410,GO:0009416,GO:0009453,GO:0009581,GO:0009582,GO:0009583,GO:0009593,GO:0009605,GO:0009612,GO:0009628,GO:0009636,GO:0009719,GO:0009966,GO:0009967,GO:0009987,GO:0010033,GO:0010035,GO:0010037,GO:0010243,GO:0010378,GO:0010646,GO:0010647,GO:0010817,GO:0014070,GO:0014074,GO:0014832,GO:0014848,GO:0015075,GO:0015085,GO:0015267,GO:0015276,GO:0015278,GO:0015318,GO:0016020,GO:0016021,GO:0016043,GO:0016048,GO:0016324,GO:0019233,GO:0019722,GO:0019725,GO:0019932,GO:0022607,GO:0022803,GO:0022832,GO:0022834,GO:0022836,GO:0022838,GO:0022839,GO:0022843,GO:0022857,GO:0022890,GO:0023041,GO:0023051,GO:0023052,GO:0023056,GO:0030001,GO:0030003,GO:0030424,GO:0031000,GO:0031224,GO:0031226,GO:0031644,GO:0031646,GO:0032024,GO:0032421,GO:0032501,GO:0032879,GO:0032880,GO:0032991,GO:0033554,GO:0033555,GO:0034220,GO:0034605,GO:0034702,GO:0034703,GO:0035556,GO:0035690,GO:0035774,GO:0036270,GO:0038023,GO:0040011,GO:0040040,GO:0042221,GO:0042330,GO:0042331,GO:0042391,GO:0042493,GO:0042542,GO:0042592,GO:0042752,GO:0042802,GO:0042995,GO:0043005,GO:0043052,GO:0043269,GO:0043270,GO:0043279,GO:0043933,GO:0044057,GO:0044070,GO:0044085,GO:0044425,GO:0044459,GO:0044464,GO:0045177,GO:0046677,GO:0046873,GO:0046883,GO:0046887,GO:0046957,GO:0048265,GO:0048518,GO:0048519,GO:0048522,GO:0048523,GO:0048583,GO:0048584,GO:0048878,GO:0050708,GO:0050714,GO:0050789,GO:0050794,GO:0050796,GO:0050801,GO:0050848,GO:0050850,GO:0050877,GO:0050896,GO:0050906,GO:0050907,GO:0050909,GO:0050912,GO:0050913,GO:0050951,GO:0050954,GO:0050955,GO:0050960,GO:0050961,GO:0050965,GO:0050966,GO:0050968,GO:0050974,GO:0050982,GO:0051046,GO:0051047,GO:0051049,GO:0051050,GO:0051179,GO:0051209,GO:0051222,GO:0051223,GO:0051234,GO:0051239,GO:0051240,GO:0051259,GO:0051260,GO:0051262,GO:0051282,GO:0051283,GO:0051289,GO:0051480,GO:0051606,GO:0051641,GO:0051649,GO:0051716,GO:0051930,GO:0051931,GO:0051969,GO:0052129,GO:0055065,GO:0055074,GO:0055080,GO:0055082,GO:0055085,GO:0060089,GO:0060341,GO:0060401,GO:0060402,GO:0061178,GO:0065003,GO:0065007,GO:0065008,GO:0070201,GO:0070417,GO:0070588,GO:0070838,GO:0070887,GO:0071241,GO:0071244,GO:0071310,GO:0071312,GO:0071313,GO:0071407,GO:0071415,GO:0071417,GO:0071466,GO:0071495,GO:0071840,GO:0071944,GO:0072347,GO:0072503,GO:0072507,GO:0072511,GO:0090087,GO:0090276,GO:0090277,GO:0097458,GO:0097553,GO:0097603,GO:0097604,GO:0098590,GO:0098655,GO:0098660,GO:0098662,GO:0098771,GO:0098796,GO:0098862,GO:0098900,GO:0098908,GO:0099094,GO:0099604,GO:0120025,GO:1901698,GO:1901699,GO:1901700,GO:1901701,GO:1902495,GO:1902531,GO:1902533,GO:1903522,GO:1903530,GO:1903532,GO:1903793,GO:1904058,GO:1904951,GO:1990351,GO:1990760
## 2 -
## EC KEGG_ko KEGG_Pathway KEGG_Module KEGG_Reaction KEGG_rclass
## 1 - ko:K04984 ko04750,map04750 - - -
## 2 - - - - - -
## BRITE KEGG_TC CAZy
## 1 ko00000,ko00001,ko04040 1.A.4.6.1,1.A.4.6.2,1.A.4.6.3,1.A.4.6.5 -
## 2 - - -
## BiGG_Reaction PFAMs KEGG_new
## 1 - Ank,Ank_2,Ank_3,Ank_4,Ion_trans K04984
## 2 - Ank,Ank_2,Ank_4,Ank_5,Ion_trans K04984
dim(gogene)
## [1] 33730 33
geneInfo <- read.csv("../../output/WGCNA/WGCNA_ModuleMembership.csv") #this file was generated from the WGCNA analyses and contains the modules of interest
geneInfo<- plyr::rename(geneInfo, c("X"="gene_id"))
dim(geneInfo) # there are 9012 genes in our gene info file
## [1] 9012 40
geneInfo <- merge(gogene, geneInfo, by=c("gene_id")) #merging the GO and Kegg info to module membership for the 9012 genes
Format GO terms to remove dashes and quotes and separate by semicolons (replace , with ;) in GOs column
geneInfo$GOs <- gsub(",", ";", geneInfo$GOs)
geneInfo$GOs <- gsub('"', "", geneInfo$GOs)
geneInfo$GOs <- gsub("-", NA, geneInfo$GOs)
geneInfo$KEGG_new[geneInfo$KEGG_new == ""] <- NA
unique(geneInfo$moduleColor)
## [1] "green" "blue" "salmon" "turquoise" "yellow"
## [6] "black" "red" "magenta" "lightcyan" "purple"
## [11] "brown" "pink" "midnightblue" "tan" "cyan"
geneInfo$Length<-lengths$transcript_lengths[match(geneInfo$gene_id, lengths$seqnames)]
dim(geneInfo)
## [1] 9012 73
write.csv(geneInfo, file = "../../output/WGCNA/GO_analysis/geneInfo_WGCNA.csv") #gene info for reference/supplement
calc_up_mods <- c("brown", "red", "black", "pink", "salmon", "blue")
nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")) #942
## [1] 942
nrow(geneInfo %>% dplyr::filter(moduleColor=="red")) #425
## [1] 425
nrow(geneInfo %>% filter(moduleColor=="black")) #396
## [1] 396
nrow(geneInfo %>% filter(moduleColor=="pink")) #220
## [1] 220
nrow(geneInfo %>% filter(moduleColor=="salmon")) #154
## [1] 154
nrow(geneInfo %>% filter(moduleColor=="blue")) #1989
## [1] 1989
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="brown")), nrow(geneInfo %>% dplyr::filter(moduleColor=="red")), nrow(geneInfo %>% filter(moduleColor=="black")), nrow(geneInfo %>% filter(moduleColor=="pink")), nrow(geneInfo %>% filter(moduleColor=="salmon")), nrow(geneInfo %>% filter(moduleColor=="blue")))
## [1] 4126
# 4126
calc_down_mods <- c("turquoise","magenta","lightcyan")
nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")) #2558
## [1] 2558
nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")) #219
## [1] 219
nrow(geneInfo %>% filter(moduleColor=="lightcyan")) #65
## [1] 65
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="turquoise")), nrow(geneInfo %>% dplyr::filter(moduleColor=="magenta")), nrow(geneInfo %>% filter(moduleColor=="lightcyan")))
## [1] 2842
# 2842
other_mods <- c("green","yellow", "purple", "midnightblue","cyan","tan")
sum(nrow(geneInfo %>% dplyr::filter(moduleColor=="green")), nrow(geneInfo %>% dplyr::filter(moduleColor=="yellow")), nrow(geneInfo %>% filter(moduleColor=="purple")), nrow(geneInfo %>% filter(moduleColor=="midnightblue")), nrow(geneInfo %>% filter(moduleColor=="cyan")),nrow(geneInfo %>% filter(moduleColor=="tan")))
## [1] 2044
# 2044
# 4126 + 2842 + 2044 = 9012, which represents all of our genes
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
4126 genes are in the 6 modules significantly upregulated by calcification.
### Generate vector with names in just the module we are analyzing
# ID.vector <- geneInfo %>%
# filter(moduleColor==c("brown", "red", "black", "pink", "salmon", "green")) %>%
# #get_rows(.data[[module]]))%>%
# pull(gene_id)
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("brown", "red", "black", "pink", "salmon", "blue")) %>%
pull(gene_id)
length(ID.vector) #4126
## [1] 4126
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
rrvgo package to
reduce redundancy in list of GO terms.#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
GO_05 <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv")
go_results_BP <- GO_05 %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 654 8
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0050794 8.436918e-08 1.0000000 1395
## 2 2 GO:0065007 1.553493e-06 0.9999989 1574
## 3 3 GO:0050789 1.584882e-06 0.9999989 1479
## 4 4 GO:0048523 1.581326e-05 0.9999878 800
## 5 5 GO:0048518 3.043369e-05 0.9999758 950
## 6 6 GO:0051336 3.403563e-05 0.9999783 232
## numInCat term ontology
## 1 2785 regulation of cellular process BP
## 2 3184 biological regulation BP
## 3 2981 regulation of biological process BP
## 4 1569 negative regulation of cellular process BP
## 5 1888 positive regulation of biological process BP
## 6 415 regulation of hydrolase activity BP
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
#compare_clustering_methods(Mat, plot_type = "heatmap")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 654 terms by 'binary_cut'... 184 clusters, used 1.835802 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen
## 2
CalcUpMods_GO_P05 <- go_results_BP %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcUpMods_GO_P05
ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05.pdf", CalcUpMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 508 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 73
The reduced list of terms is 508 terms that falls under 73 parent terms.
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
Plot reduced terms
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_calcification <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_calcification
ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification.pdf", plot=GO.plot_calcification, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcUpMods_GO_P05_reduced <- go_results_BP_reduced %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Upregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcUpMods_GO_P05_reduced
ggsave("../../output/WGCNA/GO_analysis/CalcUpMods_GO_P05_reduced.pdf", CalcUpMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("brown")) %>%
pull(gene_id)
length(ID.vector) #942
## [1] 942
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector #set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_brown <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Brown")
go_results_BP <- GO_05_brown %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 206 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_brown.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 206 terms by 'binary_cut'... 73 clusters, used 0.358979 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 11 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("red")) %>%
pull(gene_id)
length(ID.vector) #425
## [1] 425
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_red <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Red")
go_results_BP <- GO_05_red %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 209 8
#Write file of results
write.csv(GO_05_red, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_red.csv")
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_red.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 209 terms by 'binary_cut'... 147 clusters, used 0.9499679 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 9 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("black")) %>%
pull(gene_id)
length(ID.vector) #396
## [1] 396
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_black <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Black")
go_results_BP <- GO_05_black %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 395 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_black.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 395 terms by 'binary_cut'... 281 clusters, used 3.392079 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("pink")) %>%
pull(gene_id)
length(ID.vector) #220
## [1] 220
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_pink <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Pink")
go_results_BP <- GO_05_pink %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 234 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_pink.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 234 terms by 'binary_cut'... 125 clusters, used 0.522738 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 6 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("salmon")) %>%
pull(gene_id)
length(ID.vector) #154
## [1] 154
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_salmon <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Salmon")
go_results_BP <- GO_05_salmon %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 198 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_salmon.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 198 terms by 'binary_cut'... 79 clusters, used 0.2580299 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 10 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("blue")) %>%
pull(gene_id)
length(ID.vector) #1989
## [1] 1989
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_blue <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Blue")
go_results_BP <- GO_05_blue %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 1530 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_up_mods_GOSEM_blue.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50))
## Cluster 1530 terms by 'binary_cut'... 419 clusters, used 16.81366 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 7 GO lists...
dev.off()
## quartz_off_screen
## 2
GO_05_UP <- rbind(GO_05_brown,GO_05_red,GO_05_black,GO_05_pink,GO_05_salmon,GO_05_blue)
length(GO_05_UP$GOterm) #3810 enriched GO terms
## [1] 3810
length(unique(GO_05_UP$GOterm)) #3617 enriched GO terms
## [1] 3617
# Collapse GO terms that are duplicated accross modules - sort by GO term and p-value, then keep the row with the lowest p-value, collapsing the module names so you know which modules were represented
# Identify and collapse duplicates
GO_05_UP <- GO_05_UP %>%
arrange(GOterm, over_represented_pvalue) %>%
group_by(GOterm) %>% mutate(Module = paste(unique(Module), collapse = ";")) %>%
group_by(GOterm) %>%
slice_min(order_by = over_represented_pvalue, n = 1) %>%
ungroup()
length(GO_05_UP$GOterm) #3617 enriched GO terms
## [1] 3617
length(unique(GO_05_UP$GOterm)) #3617 enriched GO terms, all are unique
## [1] 3617
#Write file of results
write.csv(GO_05_UP, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv")
2842 genes are in the 4 modules significantly downregulated by calcification.
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
### Generate vector with names in just the module we are analyzing
ID.vector <- geneInfo%>%
filter(moduleColor %in% c("turquoise","magenta","lightcyan"))%>%
pull(gene_id)
length(ID.vector) #2842
## [1] 2842
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
### Run the GOSeq function by module color to test for GO term
enrichment. Due to high number of enriched GO terms, I am using a
p-value threshold of <0.05 and using
rrvgo package to
reduce redundancy in list of GO terms.
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05 <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue)
#Write file of results
write.csv(GO_05, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
GO_05 <-read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv")
go_results_BP <- GO_05 %>%
filter(ontology=="BP") %>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat > 10) %>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 298 8
head(go_results_BP)
## X GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 1 GO:0002181 8.295294e-15 1 67
## 2 2 GO:0006614 2.463649e-13 1 48
## 3 3 GO:0006412 3.096109e-13 1 123
## 4 4 GO:0043043 1.598313e-12 1 126
## 5 5 GO:0006613 2.273026e-12 1 49
## 6 6 GO:0006518 7.543937e-12 1 146
## numInCat term ontology
## 1 90 cytoplasmic translation BP
## 2 59 SRP-dependent cotranslational protein targeting to membrane BP
## 3 219 translation BP
## 4 231 peptide biosynthetic process BP
## 5 63 cotranslational protein targeting to membrane BP
## 6 285 peptide metabolic process BP
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), max_words=20)
## Cluster 298 terms by 'binary_cut'... 87 clusters, used 0.564898 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 4 GO lists...
dev.off()
## quartz_off_screen
## 2
CalcDownMods_GO_P05 <- go_results_BP %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcDownMods_GO_P05
ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05.pdf", CalcDownMods_GO_P05, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(go_results_BP$GOterm,
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
scores <- setNames(-log(go_results_BP$over_represented_pvalue), go_results_BP$GOterm)
reducedTerms <- reduceSimMatrix(simMatrix,
scores,
threshold=0.7,
orgdb="org.Ce.eg.db")
dim(reducedTerms)
## [1] 226 10
#keep only the goterms from the reduced list
go_results_BP_reduced <- go_results_BP %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_reduced$ParentTerm))
## [1] 38
The reduced list of terms is 226 terms that falls under 38 parent terms.
write.csv(go_results_BP_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
#plot significantly enriched GO terms by Slim Category faceted by slim term
GO.plot_calcification_down <- ggplot(go_results_BP_reduced, aes(x = ontology, y = term)) +
geom_point(aes(size=over_represented_pvalue)) +
scale_size(name="Over rep. p-value", trans="reverse", range=c(1,3))+
facet_grid(ParentTerm ~ ., scales = "free", labeller = label_wrap_gen(width = 5, multi_line = TRUE))+
theme_bw() + theme(panel.border = element_blank(), panel.grid.major = element_blank(),
panel.grid.minor = element_blank(), axis.line = element_line(colour = "black"),
strip.text.y = element_text(angle=0, size = 10),
strip.text.x = element_text(size = 20),
axis.text = element_text(size = 8),
axis.title.x = element_blank(),
axis.title.y = element_blank())
GO.plot_calcification_down
ggsave(filename="../../output/WGCNA/GO_analysis/GO.plot_calcification_down.pdf", plot=GO.plot_calcification_down, dpi=300, height=100, units="in", limitsize=FALSE)
## Saving 7 x 100 in image
CalcDownMods_GO_P05_reduced <- go_results_BP_reduced %>%
mutate(term = fct_reorder(term, over_represented_pvalue, .desc = TRUE)) %>%
ggplot(aes(x=term, y=over_represented_pvalue) ) +
geom_segment(aes(x=term ,xend=term, y=0, yend=over_represented_pvalue), color="grey") +
geom_point(size=1, color="#69b3a2") +
coord_flip() +
theme(
panel.grid.minor.y = element_blank(),
panel.grid.major.y = element_blank(),
legend.position="none"
) +
xlab("") +
ylab("over_represented_pvalue") +
ggtitle("Biological Process GO Terms Enriched in Modules Downregulated in Calcification") + #add a main title
theme(plot.title = element_text(face = 'bold',
size = 3,
hjust = 0)) +
theme_bw() + #Set background color
theme(panel.border = element_blank(), # Set border
panel.grid.major = element_blank(), #Set major gridlines
panel.grid.minor = element_blank(), #Set minor gridlines
axis.line = element_line(colour = "black"), #Set axes color
axis.text.y = element_text(size = 2), #set y axis labels size
plot.background=element_blank(),#Set the plot background
legend.position="none")
CalcDownMods_GO_P05_reduced
ggsave("../../output/WGCNA/GO_analysis/CalcDownMods_GO_P05_reduced.pdf", CalcDownMods_GO_P05_reduced, width = 12, height = 36, units = c("in"), dpi=300, limitsize=FALSE)
### Generate vector with names of all genes
ALL.vector <- c(geneInfo$gene_id)
### Generate length vector for all genes
LENGTH.vector <- as.integer(geneInfo$Length)
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("turquoise")) %>%
pull(gene_id)
length(ID.vector) #2558
## [1] 2558
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector #set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_turquoise <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Turquoise")
go_results_BP <- GO_05_turquoise %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 382 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_turquoise.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 382 terms by 'binary_cut'... 115 clusters, used 0.902597 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("magenta")) %>%
pull(gene_id)
length(ID.vector) #219
## [1] 219
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_magenta <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Magenta")
go_results_BP <- GO_05_magenta %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 210 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_magenta.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 210 terms by 'binary_cut'... 147 clusters, used 0.779767 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 7 GO lists...
dev.off()
## quartz_off_screen
## 2
ID.vector <- geneInfo %>%
filter(moduleColor %in% c("lightcyan")) %>%
pull(gene_id)
length(ID.vector) #65
## [1] 65
##Get a list of GO Terms for the all 9012 genes
GO.terms <- geneInfo %>%
dplyr::select(gene_id, GOs) %>% rename(GOs = "GO.terms")
dim(GO.terms) #9012 2
## [1] 9012 2
##Format to have one goterm per row with gene ID repeated
split <- strsplit(as.character(GO.terms$GO.terms), ";")
split2 <- data.frame(v1 = rep.int(GO.terms$gene, sapply(split, length)), v2 = unlist(split))
colnames(split2) <- c("gene", "GO.terms")
GO.terms<-split2
##Construct list of genes with 1 for genes in module and 0 for genes not in the module
gene.vector=as.integer(ALL.vector %in% ID.vector)
names(gene.vector)<-ALL.vector#set names
#weight gene vector by bias for length of gene
pwf<-nullp(gene.vector, ID.vector, bias.data=LENGTH.vector)
## Warning in pcls(G): initial point very close to some inequality constraints
#run goseq using Wallenius method for all categories of GO terms
GO.wall<-goseq(pwf, ID.vector, gene2cat=GO.terms, test.cats=c("GO:BP", "GO:MF", "GO:CC"), method="Wallenius", use_genes_without_cat=TRUE)
## Using manually entered categories.
## Calculating the p-values...
## 'select()' returned 1:1 mapping between keys and columns
GO <- GO.wall[order(GO.wall$over_represented_pvalue),]
colnames(GO)[1] <- "GOterm"
#Filtering for p < 0.05
GO_05_lightcyan <- GO %>%
dplyr::filter(over_represented_pvalue<0.05) %>%
dplyr::arrange(., ontology, over_represented_pvalue) %>% mutate(Module = "Lightcyan")
go_results_BP <- GO_05_lightcyan %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP)
## [1] 129 8
Collapsing with simplifyEnrichment package
library(simplifyEnrichment)
BP_terms <- go_results_BP$GOterm
Mat <- GO_similarity(BP_terms, ont = "BP", db = "org.Ce.eg.db")
pdf(file = "../../output/WGCNA/GO_analysis/semantic_similarity/calc_down_mods_GOSEM_lightcyan.pdf", width = 7, height = 5)
simplifyGO(Mat, word_cloud_grob_param = list(max_width = 50), min_term = 3)
## Cluster 129 terms by 'binary_cut'... 82 clusters, used 0.257122 secs.
## 'magick' package is suggested to install to give better rasterization.
##
## Set `ht_opt$message = FALSE` to turn off this message.
## Perform keywords enrichment for 8 GO lists...
dev.off()
## quartz_off_screen
## 2
GO_05_DOWN <- rbind(GO_05_turquoise,GO_05_magenta,GO_05_lightcyan)
length(GO_05_DOWN$GOterm) #1072 enriched GO terms
## [1] 1072
length(unique(GO_05_DOWN$GOterm)) #1070 enriched GO terms
## [1] 1070
# Collapse GO terms that are duplicated accross modules - sort by GO term and p-value, then keep the row with the lowest p-value, collapsing the module names so you know which modules were represented
# Identify and collapse duplicates
GO_05_DOWN <- GO_05_DOWN %>%
arrange(GOterm, over_represented_pvalue) %>%
group_by(GOterm) %>% mutate(Module = paste(unique(Module), collapse = ";")) %>%
group_by(GOterm) %>%
slice_min(order_by = over_represented_pvalue, n = 1) %>%
ungroup()
#Write file of results
write.csv(GO_05_DOWN, file = "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv")
# GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification.csv") %>% dplyr::select(-"X")
# GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down.csv") %>% dplyr::select(-"X")
GO_05_up <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_up_bymod.csv") %>% dplyr::select(-"X")
GO_05_down <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_bymod.csv") %>% dplyr::select(-"X")
go_results_BP_up <- GO_05_up %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_up)
## [1] 2647 8
go_results_BP_down <- GO_05_down %>%
filter(ontology=="BP")%>%
filter(over_represented_pvalue != "NA") %>%
#filter(numInCat>10)%>%
arrange(., over_represented_pvalue)
dim(go_results_BP_down)
## [1] 721 8
all_enriched_terms_up_down <- c(go_results_BP_up$GOterm,go_results_BP_down$GOterm)
length(all_enriched_terms_up_down)
## [1] 3368
length(unique(all_enriched_terms_up_down))
## [1] 3217
By reducing the lists together, we can ensure that each GO term is given the same parent term whether the term was in the Up or Down modules.
library(rrvgo)
#Reduce/collapse GO term set with the rrvgo package
simMatrix <- calculateSimMatrix(unique(all_enriched_terms_up_down),
orgdb="org.Ce.eg.db", #c. elegans database
ont="BP",
method="Rel")
## preparing gene to GO mapping data...
## preparing IC data...
#calculate similarity
reducedTerms <- reduceSimMatrix(simMatrix,
scores = "size",
threshold=0.7,
orgdb="org.Ce.eg.db")
## No scores provided. Falling back to term's GO size
dim(reducedTerms)
## [1] 2004 10
#keep only the goterms from the reduced list
go_results_BP_up_reduced <- go_results_BP_up %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_up_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_up_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_up_reduced$ParentTerm))
## [1] 148
#keep only the goterms from the reduced list
go_results_BP_down_reduced <- go_results_BP_down %>%
filter(GOterm %in% reducedTerms$go)
#add in parent terms to list of go terms
go_results_BP_down_reduced$ParentTerm <- reducedTerms$parentTerm[match(go_results_BP_down_reduced$GOterm, reducedTerms$go)]
length(unique(go_results_BP_down_reduced$ParentTerm))
## [1] 85
#write.csv(go_results_BP_up_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
#write.csv(go_results_BP_down_reduced, "../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_up_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_filtered.csv")
cal_up_terms <- go_results_BP_up_reduced
cal_up_terms <- cal_up_terms %>% mutate(Factor = "Up")
head(cal_up_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0050794 7.575558e-23 1 823
## 2 GO:0065007 9.035173e-22 1 912
## 3 GO:0050789 1.200453e-20 1 859
## 4 GO:0048522 2.736399e-18 1 531
## 5 GO:0048518 2.733516e-17 1 574
## 6 GO:0009987 1.373221e-16 1 1074
## numInCat term ontology Module
## 1 2785 regulation of cellular process BP Blue
## 2 3184 biological regulation BP Blue
## 3 2981 regulation of biological process BP Blue
## 4 1705 positive regulation of cellular process BP Blue
## 5 1888 positive regulation of biological process BP Blue
## 6 4026 cellular process BP Blue
## ParentTerm Factor
## 1 regulation of cellular process Up
## 2 regulation of cellular process Up
## 3 regulation of cellular process Up
## 4 positive regulation of transcription by RNA polymerase II Up
## 5 positive regulation of transcription by RNA polymerase II Up
## 6 cellular process Up
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were downregulated for calcification. The list has been further reduced by using the package rrvgo.
#cal_down_terms <- read.csv("../../output/WGCNA/GO_analysis/goseq_pattern_calcification_down_filtered.csv")
cal_down_terms <- go_results_BP_down_reduced
cal_down_terms <- cal_down_terms %>% mutate(Factor = "Down")
head(cal_down_terms)
## GOterm over_represented_pvalue under_represented_pvalue numDEInCat
## 1 GO:0002181 1.561228e-16 1 67
## 2 GO:0006412 2.328139e-16 1 123
## 3 GO:0043043 3.132084e-15 1 125
## 4 GO:0006518 5.575430e-15 1 145
## 5 GO:0006614 1.285770e-14 1 48
## 6 GO:0043603 2.192130e-14 1 170
## numInCat term ontology
## 1 90 cytoplasmic translation BP
## 2 219 translation BP
## 3 231 peptide biosynthetic process BP
## 4 285 peptide metabolic process BP
## 5 59 SRP-dependent cotranslational protein targeting to membrane BP
## 6 359 amide metabolic process BP
## Module ParentTerm Factor
## 1 Turquoise translation Down
## 2 Turquoise translation Down
## 3 Turquoise translation Down
## 4 Turquoise translation Down
## 5 Turquoise protein transport Down
## 6 Turquoise translation Down
The following list is a list of BP-ontology (“Biological Process”) GO terms that were significantly enriched (P < 0.05) within the modules that were upregulated or downregulated for calcification. The list has been further reduced by using the package rrvgo.
all_terms <- rbind(cal_down_terms,cal_up_terms)
all_terms <- all_terms[,c("Factor","GOterm","over_represented_pvalue","under_represented_pvalue","numDEInCat","numInCat","term","ontology","ParentTerm","Module")]
all_terms$GOterm<-as.factor(all_terms$GOterm)
dim(all_terms) #2140 reduced terms
## [1] 2140 10
length(unique(all_terms$GOterm)) #this represents 2004 unique terms between the two lists of reduced terms
## [1] 2004
length(unique(all_terms$ParentTerm)) #this represents 153 unique terms between the two lists of reduced terms
## [1] 153
This is collapsing the list from 2236 rows to 2004, representing the 2004 unique GO terms in the list above. It collapses the information in the columns “ParentTerm” and “Factor” so that for each GO term we get the parent terms associated and if this was enriched in modules upregulated for calcification, downregulated for calcification, or both.
goterms_shared <- all_terms %>%
group_by(GOterm) %>%
dplyr::summarise(
ParentTerm = paste(unique(ParentTerm), collapse = ", "),
Factor = paste(unique(Factor), collapse = ", "),
term = paste(unique(term), collapse = ", ")
)
dim(goterms_shared)
## [1] 2004 4
goterms_shared_full <- goterms_shared %>%
left_join(dplyr::select(all_terms,GOterm, Factor, over_represented_pvalue), by = "GOterm") %>% #select 3 columns GOterm, Factor, over_represented_pvalue from all_terms and left join by GOterm, turns this dataframe from 2004 rows back to the 2236 in all_terms
mutate(pvalue_Up = ifelse(Factor.y == "Up", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Up factor
mutate(pvalue_Down = ifelse(Factor.y == "Down", over_represented_pvalue, NA)) %>% #add a column that is the p-value for the Down factor
dplyr::select(-c("over_represented_pvalue", "Factor.y")) %>% #remove the unique columns so that we can collapse the dataframe
group_by(GOterm, ParentTerm, Factor.x, term) %>% #group by the repeated columns for the non-unique GO terms
dplyr::summarize(pvalue_Up = na.omit(pvalue_Up)[1], #carry over the p-value for the term in the up direction, by taking the first non-NA value.
pvalue_Down = na.omit(pvalue_Down)[1]) %>% #carry over the p-value for the term in the down direction, by taking the first non-NA value.
rename(Factor.x = "Factor") #rename column
## `summarise()` has grouped output by 'GOterm', 'ParentTerm', 'Factor.x'. You can
## override using the `.groups` argument.
write.csv(goterms_shared_full, "../../output/WGCNA/GO_analysis/Merged_GOterms_factor_ParentTerm.csv")
goterms_shared_full <- goterms_shared_full %>% group_by(ParentTerm) %>% mutate("N_in_Parent" = n()) %>% ungroup()
goterms_up <- goterms_shared_full %>% dplyr::filter(Factor=="Up")
goterms_down <- goterms_shared_full %>% dplyr::filter(Factor=="Down")
goterms_shared_only <- goterms_shared_full %>% dplyr::filter(Factor=="Down, Up")
nrow(goterms_up)
## [1] 1571
nrow(goterms_down)
## [1] 297
nrow(goterms_shared_only)
## [1] 136
nrow(goterms_up) + nrow(goterms_down) + nrow(goterms_shared_only) == nrow(goterms_shared_full)
## [1] TRUE
#how many parent terms in each
length(unique(goterms_up$ParentTerm))
## [1] 145
length(unique(goterms_down$ParentTerm))
## [1] 79
length(unique(goterms_shared_only$ParentTerm))
## [1] 42
freq_fig_up_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "red") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.05) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
guides(color = FALSE)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
freq_fig_down_pval <- ggplot(goterms_shared_only, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "blue") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.05) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank())
compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs
ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Shared_ParentTerms.pdf", plot=compare_figs, dpi=300, height=3, units="in", limitsize=FALSE)
## Saving 7 x 3 in image
freq_fig_up_pval <- ggplot(goterms_up, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Up)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Up,),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "red") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.05) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank()) +
guides(color = FALSE)
freq_fig_down_pval <- ggplot(goterms_down, aes(x = reorder(ParentTerm, -N_in_Parent), y = pvalue_Down)) +
geom_segment(aes(x = ParentTerm, xend = ParentTerm, y = 0, yend = pvalue_Down),linewidth=.25) +
geom_point(size=1.5, alpha=1, colour = "blue") +
geom_hline(yintercept = 0, linetype = "solid", color = "grey", size = 0.5, show.legend = TRUE) +
coord_flip() +
theme_classic() +
ylim(0, 0.05) +
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=5),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
axis.text.y = element_text(vjust=0.5, size=5),
axis.title.x = element_blank(),
axis.title.y = element_blank())
compare_figs<-cowplot::plot_grid(freq_fig_up_pval, freq_fig_down_pval, nrow=1, align="v")
compare_figs
ggsave(filename="../../output/WGCNA/GO_analysis/Up_Down_Unique_ParentTerms.pdf", plot=compare_figs, dpi=300, height=10, units="in", limitsize=FALSE)
## Saving 7 x 10 in image
I downloaded the GOSlim terms list from the following website on 4/5/2024: http://www.informatics.jax.org/gotools/data/input/map2MGIslim.txt
I did so by going to this link and right clicking “here” at “Tab-delimited file mapping GO_ids to MGI GO_Slim category available for download here” and saying “Save link as…” , and I saved it as “map2MGIslim.txt”. I then converted this to CSV and uploaded to this github repository here.
Add slim terms to dataset
GOslim <- read.csv("../../data/map2MGIslim.csv") %>% dplyr::select(-c(term,aspect))
goterms_shared_full_slim <- goterms_shared_full %>%
inner_join(GOslim, by = c("GOterm" = "GO_id"))
goterms_up_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Up" | Factor=="Down, Up")
goterms_up_full_slim_plot <- goterms_up_slim %>%
ggplot(aes(x=pvalue_Up,
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Over-represented p-value", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text.y = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
); goterms_up_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_up_full_slim_plot <- goterms_up_slim %>%
ggplot(aes(x=log10(pvalue_Up),
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Log 10(Over-represented p-value)", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text.y = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
); goterms_up_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Up_GOSlim_Log.pdf", goterms_up_full_slim_plot, width = 5, height = 35)
goterms_down_slim <- goterms_shared_full_slim %>% dplyr::filter(Factor=="Down" | Factor=="Down, Up")
goterms_down_full_slim_plot <- goterms_down_slim %>%
ggplot(aes(x=pvalue_Down,
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Over-represented p-value", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
legend.text=element_text(size = 5),
legend.title = element_text(size = 5)
); goterms_down_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim.pdf", goterms_down_full_slim_plot, width = 5, height = 35)
goterms_down_full_slim_plot <- goterms_down_slim %>%
ggplot(aes(x=log10(pvalue_Down),
y=term)) +
geom_point(size = 0.5) +
expand_limits(x=0) +
facet_grid(GOSlim_bin ~ ., scales = "free", space='free', labeller = label_wrap_gen(width = 55, multi_line = TRUE)) +
labs(x="Log 10(Over-represented p-value)", y="") +
theme_bw() +
theme(
strip.text.y = element_text(size = 5, colour = "black", angle = 0, face = "bold"),
axis.text = element_text(size = 1, colour = "black"),
axis.title.x = element_text(size = 5, colour = "black"),
legend.text=element_text(size = 5),
legend.title = element_text(size = 5)
); goterms_down_full_slim_plot
ggsave("../../output/WGCNA/GO_analysis/Calc_Down_GOSlim_Log.pdf", goterms_down_full_slim_plot, width = 5, height = 35)
SharedGOterms = # of GO terms within the parent term that are in the list for “Down”, “Up”, or “Down, Up”
result_parent_unique <- goterms_shared %>%
group_by(ParentTerm,Factor) %>%
dplyr::summarise(SharedGOterms = n_distinct(GOterm)) %>%
arrange(-SharedGOterms)
## `summarise()` has grouped output by 'ParentTerm'. You can override using the
## `.groups` argument.
parent_filtered_up <- result_parent_unique %>%
dplyr::filter(Factor=="Up" | Factor=="Down, Up")
#dplyr::filter(SharedGOterms>=5)
parent_filtered_down <- result_parent_unique %>%
dplyr::filter(Factor=="Down" | Factor=="Down, Up")
#dplyr::filter(SharedGOterms>=5)
result_up <- cal_up_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term))%>%
mutate(Calcification.direction = "Up")
head(result_up)
## # A tibble: 6 × 3
## ParentTerm Number.of.terms Calcification.direct…¹
## <chr> <int> <chr>
## 1 Arp2/3 complex-mediated actin nucleati… 13 Up
## 2 B cell differentiation 8 Up
## 3 BMP signaling pathway 19 Up
## 4 DNA topological change 7 Up
## 5 G protein-coupled receptor signaling p… 3 Up
## 6 Golgi organization 7 Up
## # ℹ abbreviated name: ¹Calcification.direction
dim(result_up)
## [1] 148 3
merged_up <- parent_filtered_up %>%
tidyr::separate_rows(ParentTerm, sep = ", ") %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
left_join(result_up, by = "ParentTerm") #join with result from above to get the Number.of.Terms column
merged_up_clean <- na.omit(merged_up)
head(merged_up_clean)
## # A tibble: 6 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 Arp2/3 complex-me… 13 13 Up
## 2 B cell differenti… 8 8 Up
## 3 BMP signaling pat… 19 19 Up
## 4 DNA topological c… 7 7 Up
## 5 G protein-coupled… 3 3 Up
## 6 Golgi organization 7 7 Up
## # ℹ abbreviated name: ¹Calcification.direction
dim(merged_up_clean)
## [1] 147 4
result_down <- cal_down_terms %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Number.of.terms = n_distinct(term))%>%
mutate(Calcification.direction = "Down")
dim(result_down)
## [1] 85 3
head(result_down)
## # A tibble: 6 × 3
## ParentTerm Number.of.terms Calcification.direct…¹
## <chr> <int> <chr>
## 1 Arp2/3 complex-mediated actin nucleati… 2 Down
## 2 DNA topological change 2 Down
## 3 G protein-coupled receptor signaling p… 2 Down
## 4 IRE1-mediated unfolded protein response 5 Down
## 5 NAD metabolic process 6 Down
## 6 NADH regeneration 1 Down
## # ℹ abbreviated name: ¹Calcification.direction
merged_down <- parent_filtered_down %>%
tidyr::separate_rows(ParentTerm, sep = ", ") %>%
dplyr::group_by(ParentTerm) %>%
dplyr::summarize(Sum_of_SharedGOterms = sum(SharedGOterms, na.rm = TRUE)) %>%
left_join(result_down, by = "ParentTerm")
merged_down_clean<- na.omit(merged_down)
head(merged_down_clean)
## # A tibble: 6 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 Arp2/3 complex-me… 2 2 Down
## 2 DNA topological c… 2 2 Down
## 3 G protein-coupled… 2 2 Down
## 4 IRE1-mediated unf… 5 5 Down
## 5 NAD metabolic pro… 6 6 Down
## 6 NADH regeneration 1 1 Down
## # ℹ abbreviated name: ¹Calcification.direction
cal_freq_terms_filtered_up <- merged_up_clean %>%
filter(Number.of.terms>=10) %>%
filter(Calcification.direction=="Up")
dim(cal_freq_terms_filtered_up)
## [1] 68 4
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms), group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
cal_freq_terms_filtered_down <- merged_down_clean %>%
filter(Number.of.terms>=10)
#filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 12 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 fatty acid metab… 12 12 Down
## 2 intracellular si… 12 12 Down
## 3 metabolic process 15 15 Down
## 4 negative regulat… 12 12 Down
## 5 peptidyl-serine … 16 16 Down
## 6 protein transport 16 16 Down
## 7 proton motive fo… 26 26 Down
## 8 regulation of pr… 13 13 Down
## 9 regulation of tr… 17 17 Down
## 10 reproduction 13 13 Down
## 11 tricarboxylic ac… 11 11 Down
## 12 ubiquitin-depend… 26 26 Down
## # ℹ abbreviated name: ¹Calcification.direction
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs
cal_freq_terms_filtered_up <- merged_up_clean %>%
filter(Number.of.terms>=20) %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up
## # A tibble: 25 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 IRE1-mediated un… 41 41 Up
## 2 anatomical struc… 20 20 Up
## 3 anatomical struc… 22 22 Up
## 4 cell cycle 57 57 Up
## 5 cell projection … 21 21 Up
## 6 chondroitin sulf… 24 24 Up
## 7 fatty acid metab… 23 23 Up
## 8 innate immune re… 28 28 Up
## 9 intracellular ca… 24 24 Up
## 10 intracellular si… 33 33 Up
## # ℹ 15 more rows
## # ℹ abbreviated name: ¹Calcification.direction
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_gr20.pdf", plot=freq_fig_up, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
cal_freq_terms_filtered_down <- merged_down_clean %>%
filter(Number.of.terms>=20)
#filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down
## # A tibble: 2 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 proton motive for… 26 26 Down
## 2 ubiquitin-depende… 26 26 Down
## # ℹ abbreviated name: ¹Calcification.direction
freq_fig_down<-ggplot(cal_freq_terms_filtered_down, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_gr20.pdf", plot=freq_fig_down, dpi=300, height=12, units="in", limitsize=FALSE)
## Saving 7 x 12 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, nrow=2, align="v")
compare_figs
cal_freq_terms_filtered_up_all <- merged_up_clean %>%
filter(Calcification.direction=="Up")
cal_freq_terms_filtered_up_all
## # A tibble: 147 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 Arp2/3 complex-m… 13 13 Up
## 2 B cell different… 8 8 Up
## 3 BMP signaling pa… 19 19 Up
## 4 DNA topological … 7 7 Up
## 5 G protein-couple… 3 3 Up
## 6 Golgi organizati… 7 7 Up
## 7 IRE1-mediated un… 41 41 Up
## 8 L-ascorbic acid … 3 3 Up
## 9 Mo-molybdopterin… 5 5 Up
## 10 NAD metabolic pr… 4 4 Up
## # ℹ 137 more rows
## # ℹ abbreviated name: ¹Calcification.direction
#counts$Direction.of.flat.origin<- factor(counts$Direction.of.flat.origin, levels =c("up","no pattern","down"))
#counts$Module<- factor(counts$Module, levels=c("Blue","Brown","Greenyellow","Cyan","Pink","Magenta","Lightcyan","Midnight blue","Purple","Turquiose","Red","Black"))
freq_fig_up<-ggplot(cal_freq_terms_filtered_up_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms),group=1))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,60))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(0, 100))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95,size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_up
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_up_ALL.pdf", plot=freq_fig_up, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
cal_freq_terms_filtered_down_all <- merged_down_clean %>%
filter(Calcification.direction=="Down")
cal_freq_terms_filtered_down_all
## # A tibble: 84 × 4
## ParentTerm Sum_of_SharedGOterms Number.of.terms Calcification.direct…¹
## <chr> <int> <int> <chr>
## 1 Arp2/3 complex-m… 2 2 Down
## 2 DNA topological … 2 2 Down
## 3 G protein-couple… 2 2 Down
## 4 IRE1-mediated un… 5 5 Down
## 5 NAD metabolic pr… 6 6 Down
## 6 NADH regeneration 1 1 Down
## 7 P granule assemb… 1 1 Down
## 8 actin cytoskelet… 1 1 Down
## 9 acyl-CoA metabol… 1 1 Down
## 10 anatomical struc… 4 4 Down
## # ℹ 74 more rows
## # ℹ abbreviated name: ¹Calcification.direction
freq_fig_down<-ggplot(cal_freq_terms_filtered_down_all, aes(y=Number.of.terms,x=reorder(ParentTerm, Number.of.terms)))+
#facet_wrap(~Calcification.direction, nrow = 1)+
geom_point(size=5, alpha=1, pch=21,color="black")+
geom_segment(aes(x=ParentTerm, xend=ParentTerm, y=0, yend=Number.of.terms)) +
#geom_hline(yintercept = 0, linetype="solid", color = 'black', size=0.5, show.legend = TRUE)+
coord_flip()+
scale_y_continuous(expression(GO~term~counts),limits=c(0,27))+
#scale_color_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
#scale_fill_manual("Direction.of.flat.origin",values= c("up"="#b2182b","no pattern"="grey","down" ="#67a9cf"))+
scale_fill_gradientn(colours=c("white","#d1e5f0","#92c5de","#4393c3","#2166ac"), na.value = "grey98",limits = c(0, 100))+
#scale_fill_gradientn(colours=c("#fddbc7","#f4a582","#d6604d","#b2182b"), na.value = "grey98",limits = c(10, 40))+
#scale_color_gradientn(colours=c("#b2182b","#fddbc7","white","#d1e5f0","#67a9cf", "#67a9cf", "#2166ac"), na.value = "grey98",limits = c(-0, 40))+
theme_classic()+
theme(axis.text.x=element_text(vjust=0.5, hjust=0.95, size=12),
plot.title = element_text(margin = margin(t = 10, b = 10), hjust=0.5),
panel.background= element_rect(fill=NA, color='black'),
legend.title = element_blank(),
axis.text.y = element_text(vjust=0.5, size=12), #making the axis text larger
axis.title.x = element_blank(),#making the axis title larger
axis.title.y = element_blank(),
strip.text = element_text(size=12))#making the axis title larger
freq_fig_down
ggsave(filename="../../output/WGCNA/GO_analysis/freq_fig_down_ALL.pdf", plot=freq_fig_down, dpi=300, height=17, units="in", limitsize=FALSE)
## Saving 7 x 17 in image
compare_figs<-cowplot::plot_grid(freq_fig_up, freq_fig_down, ncol=2, align="h")
compare_figs